#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import re
import requests
import sys


PAT_TITLE = re.compile("g_lnovel_name = '(.*)'")
PAT_LINKS = re.compile('<a href="/(.*).txt">')

def request(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chrome/37.0.2062.94 Safari/537.36'
    }

    r = requests.get(url, headers=headers)

    return r.content


def get_links(html):
    links = re.findall(PAT_LINKS, html)
    links = [ 'http://xs.dmzj.com/{}.txt'.format(l) for l in links ]
    return links


def get_title(html):
    m = re.search(PAT_TITLE, html)
    if m:
        return m.group(1)
    return "NoTitle"


def get_novel(url):
    html = request(url)
    title = get_title(html)

    print 'getting {} ...'.format(title)

    links = get_links(html)

    print 'get {} links'.format(len(links))

    with open('{}.txt'.format(title), 'wb') as f:
        for l in links:
            print 'getting {} ...'.format(l)
            c = request(l)
            f.write (c)



if len(sys.argv) < 2:
    print "get novel from xs.dmzj.com"
    print ""
    print "Usage: %s url" % sys.argv[0]
    sys.exit()

url = sys.argv[1]

get_novel(url)

